library(bigMap)
source('~/bigMap/bdm_xtrs.R')
bdm.mybdm('~/omics/10xmouse/bm350')
## [1] "~/omics/10xmouse/bm350/"

bigMap::ptsne

# read dataset file
D <- read.csv('~/omics/10xmouse/10x1M.csv')
# get labels
L <- as.numeric(as.factor(D[, 52]))
# define a bigMap generic instance to perform ptSNE on a HPC
# note: data is passed as the name of a .csv file with input data
m <- bdm.init('10x1M', '~/omics/10xmouse/10x1M.csv.gz')

perplpexity 50

# bigMap::ptsne; run on HPC platform
m.50 <- bdm.ptsne(m, threads = 100, type = 'MPI', layers = 2, boost = 3, ppx = 50)
# bigMap::k-ary-neihgbourhood preservation; run on HPC platform
m.50 <- bdm.qlty(m.50, inp.data = '~/omics/10xmouse/10x1M.csv.gz', threads = 60, type = 'SOCK', layers = 2, ret.qlty = T, qm = 'kn')
# transfer to local machine
bdm.scp(m.50)
+++ saved to ~/omics/10xmouse/10x1M_z100_l2_b3_r1_p50.RData

perplpexity 100

m.100 <- bdm.ptsne(m, threads = 100, type = 'MPI', layers = 2, boost = 3, ppx = 100)
m.100 <- bdm.qlty(m.100, inp.data = '~/omics/10xmouse/10x1M.csv.gz', threads = 60, type = 'SOCK', layers = 2, ret.qlty = T, qm = 'kn')
bdm.scp(m.100)
+++ saved to ~/omics/10xmouse/10x1M_z100_l2_b3_r1_p100.RData

perplpexity 200

m.200 <- bdm.ptsne(m, threads = 100, type = 'MPI', layers = 2, boost = 3, ppx = 200)
m.200 <- bdm.qlty(m.200, inp.data = '~/omics/10xmouse/10x1M.csv.gz', threads = 60, type = 'SOCK', layers = 2, ret.qlty = T, qm = 'kn')
bdm.scp(m.200)
+++ saved to ~/omics/10xmouse/10x1M_z100_l2_b3_r1_p200.RData

perplpexity 400

m.400 <- bdm.ptsne(m, threads = 100, type = 'MPI', layers = 2, boost = 3, ppx = 400)
m.400 <- bdm.qlty(m.400, inp.data = '~/omics/10xmouse/10x1M.csv.gz', threads = 60, type = 'SOCK', layers = 2, ret.qlty = T, qm = 'kn')
bdm.scp(m.400)
+++ saved to ~/omics/10xmouse/10x1M_z100_l2_b3_r1_p400.RData

perplpexity 800

m.800 <- bdm.ptsne(m, threads = 100, type = 'MPI', layers = 2, boost = 3, ppx = 800)
m.800 <- bdm.qlty(m.800, inp.data = '~/omics/10xmouse/10x1M.csv.gz', threads = 60, type = 'SOCK', layers = 2, ret.qlty = T, qm = 'kn')
bdm.scp(m.800)
+++ saved to ~/omics/10xmouse/10x1M_z100_l2_b3_r1_p800.RData

perplpexity 1600

m.1600 <- bdm.ptsne(m, threads = 100, type = 'MPI', layers = 2, boost = 3, ppx = 1600)
m.1600 <- bdm.qlty(m.1600, inp.data = '~/omics/10xmouse/10x1M.csv.gz', threads = 60, type = 'SOCK', layers = 2, ret.qlty = T, qm = 'kn')
bdm.scp(m.1600)
+++ saved to ~/omics/10xmouse/10x1M_z100_l2_b3_r1_p1600.RData

Outputs

omics.class.legend(ncol = 8)

load('~/omics/10xmouse/bm350/10x1M_z100_l2_b3_r1_p50.RData')
load('~/omics/10xmouse/bm350/10x1M_z100_l2_b3_r1_p100.RData')
load('~/omics/10xmouse/bm350/10x1M_z100_l2_b3_r1_p200.RData')
load('~/omics/10xmouse/bm350/10x1M_z100_l2_b3_r1_p400.RData')
load('~/omics/10xmouse/bm350/10x1M_z100_l2_b3_r1_p800.RData')
load('~/omics/10xmouse/bm350/10x1M_z100_l2_b3_r1_p1600.RData')

Precision (beta) qMaps

k-ary neighbourhood preservation

omics.kn.plot(list(m.50, m.100, m.200, m.400, m.800, m.1600))

Running times

m.list <- list(m.50, m.100, m.200, m.400, m.800, m.1600)
r.times <- sapply(m.list, function(m) round(unlist(m$t) /60, 0))
r.times <- rbind(r.times, apply(r.times, 2, sum))
rownames(r.times) <- c('betas', 'ptsne', 'total')
colnames(r.times) <- sapply(m.list, function(m) m$Xbeta$ppx)
knitr::kable(r.times, caption = 'Computation times (min)') %>%
  kable_styling(full_width = F)
Computation times (min)
50 100 200 400 800 1600
betas 1 1 1 1 3 3
ptsne 15 17 13 18 20 23
total 16 18 14 19 23 26

Discussion